clear
capture log close 
set more off 

/* DESCRIPTION:
	This file creates the following tables and figures for the appendix:

	*FIGURE B1: DECOMPOSITION RESULTS FOR BASIC MODEL
	*TABLE B1: IRP Decompositions: Separate mother and father models
	*TABLE B2: Parameters: Separate mother and father models
	*TABLE B3: IRP decompositions: Model with assortative mating
	*TABLE B4: Decomposition of IRP for average parent income measure (%)
	*TALBE B5: Parameters: Model with assortative mating	
*/

********************************************************************************
*** PREP DECOMP RESULTS FOR CREATING TABLES  - MAIN  MODEL
********************************************************************************

***** US
local yrmin1=1985 
local yrmax1=1995
local yrmin3=2008
local yrmax3=2018
** Main decomp model - mother/father IRP
clear
append using ${us_results}/model_5b_sequential_`yrmin1'_`yrmax1'.dta
//append using ${us_results}/model_5b_sequential_`yrmin2'_`yrmax2'.dta
append using ${us_results}/model_5b_sequential_`yrmin3'_`yrmax3'.dta
append using ${us_results}/model_5bF_sequential_`yrmin1'_`yrmax1'.dta
//append using ${us_results}/model_5bF_sequential_`yrmin2'_`yrmax2'.dta
append using ${us_results}/model_5bF_sequential_`yrmin3'_`yrmax3'.dta
order N Nkids Nmothers Nfathers, last
gen Period=.
replace Period=1 if yearmin==`yrmin1' & yearmax==`yrmax1'
//replace Period=2 if yearmin==`yrmin2' & yearmax==`yrmax2'
replace Period=3 if yearmin==`yrmin3' & yearmax==`yrmax3'
label define periodvals 1 "`yrmin1'-`yrmax1'" 2 "`yrmin2'-`yrmax2'" 3 "`yrmin3'-`yrmax3'", replace
label values Period periodvals 
** Decomposition  
*Decomposition elements specific to this model 
g b_Hp=((pi1*O_p)+pi2)*O_p*(vHp/vYp)	// own HC										-->	 1
g b_ep=pi1*(O_0p^2)*(vEta_ep/vYp)		// own (non-HC) employment						-->	 2
g b_p=pi1*(vEta_p/vYp)					// own residual yp								-->	 3
g b_HpHsp=O_p*d1*((O_s*pi1s)+pi2s)*(vHp/vYp) 	// sorting on HC 						-->  4
g b_EpyEspy=pi1s*(CovEpyEspy/vYp)				// sorting on residual income 			-->	 5A
g b_HpEspy=(O_p*pi1s*CovHpEspy/vYp) 			// sorting: own HC, spouse resid		-->	 5B (PART OF IT)
g b_EpyHsp=((pi1s*O_s)+pi2s)*(CovHspEpy/vYp) 	// sorting: spouse HC, own resid		-->	 5B (PART OF IT)
g b_sp=pi1*(O_0p^2)*(phi2^2)*(vEta_sp/vYp) 	// part of spouse HC orthogonal to own 		-->  5C
g b_4=b_HpHsp
g b_5=b_EpyEspy+b_HpEspy+b_EpyHsp+b_sp
gen B2=(b_Hp+b_ep+b_p+b_sp+b_HpHsp+b_HpEspy+b_EpyHsp+b_EpyEspy) // verify these sum to B
tab B B2 
drop B2
*Compute "parts" of calculated IGE due to H, E (non-H part), similar to simpler model without AM. (Excluding eta_sp because not sure where to put it.)
g BH=b_Hp
g BE=b_ep
g BU=b_p
*Compute percent of Parent Char due to H, E, U. Again, same as simpler model without AM (divide by BH+BE+BU rather than B, so percent of own char).
g pctH=100*BH/(BH+BE+BU)
g pctE=100*BE/(BH+BE+BU)
g pctU=100*BU/(BH+BE+BU)
*Break down Hp channel into part related to Employment and part orthogonal to it
g BHinclE=(((O_0p*(phi1+(phi2*d1)))*((pi1*(O_0p*(phi1+(phi2*d1))))+pi2)) + (2*pi1*O_1p*(O_0p*(phi1+(phi2*d1)))))*(vHp/vYp)
g BHexclE=(O_1p*((pi1*O_1p)+pi2))*(vHp/vYp)
g temp=BHinclE+BHexclE // verify sums to BH
tab BH temp 
drop temp
*Percents for these (divide by BH+BE+BU rather than B, so percent of own char)
g pctHinclE=100*BHinclE/(BH+BE+BU)
g pctHexclE=100*BHexclE/(BH+BE+BU)
* Aggregate spouse/AM component 
g B_ownchar=b_Hp+b_ep+b_p
g B_spousesort=b_sp+b_HpHsp+b_HpEspy+b_EpyHsp+b_EpyEspy
g pct_ownchar=100*B_ownchar/B
g pct_spousesort=100*B_spousesort/B
* Compute percents of AM for 4 and 5a, 5b, 5c in decomp 
g pct_EpyEspy=100*b_EpyEspy/B_spousesort
g pct_HpHsp=100*b_HpHsp/B_spousesort
g pct_HE=100*(b_HpEspy+b_EpyHsp)/B_spousesort
g pct_sp=100*b_sp/B_spousesort
g pct_4=100*b_4/(b_4+b_5)
g pct_5=100*b_5/(b_4+b_5)
*Save this, then do avg par data, combine and save below
tempfile model5_indivpar
save 	`model5_indivpar'

** Main decomp model - parent avg income IRP
clear 
append using ${us_results}/model_5bavg_sequential_`yrmin1'_`yrmax1'.dta
//append using ${us_results}/model_5bavg_sequential_`yrmin2'_`yrmax2'.dta
append using ${us_results}/model_5bavg_sequential_`yrmin3'_`yrmax3'.dta
gen Period=.
replace Period=1 if yearmin==`yrmin1' & yearmax==`yrmax1'
//replace Period=2 if yearmin==`yrmin2' & yearmax==`yrmax2'
replace Period=3 if yearmin==`yrmin3' & yearmax==`yrmax3'
label define periodvals 1 "`yrmin1'-`yrmax1'" 2 "`yrmin2'-`yrmax2'" 3 "`yrmin3'-`yrmax3'", replace
label values Period periodvals 

**** Decomposition  
*Decomposition elements specific to this model 
g b_Hp=.5*((pi1*O_p)+pi2)*O_p*(vHp/vYp)	// own HC (mother)
g b_ep=.5*pi1*(O_0p^2)*(vEta_ep/vYp)		// own (non-HC) employment
g b_p=.5*pi1*(vEta_p/vYp)					// own residual yp
g b_Hsp=.5*((pi1s*O_s)+pi2s)*O_s*(vHsp/vYp) // spouse HC (father)
g b_es=.5*pi1s*(O_0s^2)*(vEta_es/vYp)		// spouse (non-HC) employment
g b_s=.5*pi1s*(vEta_s/vYp)					// spouse residual ys
g b_sp=.5*pi1*(O_0p^2)*(phi2^2)*(vEta_sp/vYp) 	// part of spouse HC orthogonal to own HC -- PUT THIS WITH NON-HC AM
g b_ss=.5*pi1s*(O_0s^2)*(phi2s^2)*(vEta_ss/vYp) 	// part of own HC orthogonal to spouse HC -- PUT THIS WITH NON-HC AM
g b_HpHsp=.5*(O_p*d1*((O_s*pi1s)+pi2s)*(vHp/vYp)+((pi1*O_p)+pi2)*O_s*d1s*(vHsp/vYp)) 	// sorting on HC
g b_HpEspy=.5*((O_p*pi1s*CovHpEspy/vYp)+(((O_p*pi1)+pi2)*CovHpEspy/vYp)) 			// sorting: own HC, spouse resid
g b_EpyHsp=.5*(((pi1s*O_s)+pi2s)*(CovHspEpy/vYp)+(pi1*O_s)*(CovHspEpy/vYp)) 	// sorting: spouse HC, own resid
g b_EpyEspy=.5*(pi1s*(CovEpyEspy/vYp)+(pi1*CovEpyEspy/vYp))				// sorting on residual income
*Mother char / Father char / Assort Mating
g b_Moth= (b_Hp+b_ep+b_p)
g b_Fath=b_Hsp+b_es+b_s 
g b_AM=b_sp+b_ss+b_HpHsp+b_HpEspy+b_EpyHsp+b_EpyEspy
g b_AMhc=b_HpHsp
g b_AMeta=b_sp+b_ss+b_HpEspy+b_EpyHsp+b_EpyEspy
*Rescale using var(ybar_p)/var(y_c) 	/* Note: SD of child rank vars taken from August 9, 2022 decomp log files */
g sd_paravgrank=sqrt(vYp)
g sd_childrank=.
replace sd_childrank=27.72351 if yearmin==`yrmin1' & yearmax==`yrmax1'
//replace sd_childrank=28.57838 if yearmin==`yrmin2' & yearmax==`yrmax2'
replace sd_childrank=29.68365 if yearmin==`yrmin3' & yearmax==`yrmax3'
g rescale_ratio=sd_paravgrank/sd_childrank
label var rescale_ratio "rescale factor= sd(ybar_p)/sd(y_c); converts IRP to correlation, same rescale to all underlying factors"
foreach var of varlist B b_* {
	replace `var'=`var'*rescale_ratio
}
*Percentage contributions
g pct_Moth=100*b_Moth/B
g pct_Fath=100*b_Fath/B 
g pct_AM=100*b_AM/B 
g pct_AMhc=100*b_AMhc/B
g pct_AMeta=100*b_AMeta/B
g B2=b_Moth+b_Fath+b_AM // verify adds to B
tab B B2 
drop B2 
*Compute "parts" of calculated IGE due to H, E (non-H part). These are same as simpler model without AM. (Excluding eta_sp because not sure where to put it.)
g BH=b_Hp
g BE=b_ep
g BU=b_p
*Compute fraction of IGE due to H, E, U. Again, same as simpler model without AM.
g pctH=100*BH/(BH+BE+BU)
g pctE=100*BE/(BH+BE+BU)
g pctU=100*BU/(BH+BE+BU)
*Save this, then combine with indiv par, clean and save combined model 5 file
tempfile model5_avgpar
save 	`model5_avgpar'

*clean and combine individual/avg files
*** Formatting / Prepare for Tables
clear 
use `model5_indivpar'
append using `model5_avgpar'
*Round numbers to nearest .001 
ds modelnum model_descr Period parent N*, not
local varstoround `r(varlist)'
foreach var in `varstoround' {
	replace `var'=round(`var',.001)
}
*Round percents to nearest .1 
foreach var of varlist pct* {
	replace `var'=round(`var',.1)
}
drop model_descr 
order modelnum Period IGE B, first
order N Nkids Nmothers Nfathers, last
	*Reshape dataset multiple times to get in order for table
	sort modelnum Period
	gen tempid=_n
	*Get list of variable names to present in table (parameters, percents, b's)
	ds tempid modelnum parent Period , not
	local vars `r(varlist)'
	local Nvars: word count `vars'
	*Rename variables for reshape
	forv i=1/`Nvars' {
	rename `:word `i' of `vars'' V_`i'
	}
	*Reshape long
	reshape long V_, i(tempid) j(varnum)
	*Store variable names
	gen _varname=""
	forv i=1/`Nvars' {
	replace _varname="`:word `i' of `vars''" if varnum==`i' 
	}
	*Reshape wide to get column for each time period 
	drop tempid
	reshape wide V_, i(varnum modelnum) j(Period)
	format V_* %12.3f
	forv t=1(2)3 {
	rename V_`t' V_`yrmin`t''_`yrmax`t''
	}
	sort modelnum varnum
*SAVE TABLE DATA
order varnum _varname, first
save ${us_results}/tables_model5_US.dta, replace




***** SWEDEN
local yrmax3=2019
** Main decomp model - mother/father IRP
clear
append using ${swe_results}/SE_model_5b_sequential_`yrmin1'_`yrmax1'.dta
//append using ${swe_results}/SE_model_5b_sequential_`yrmin2'_`yrmax2'.dta
append using ${swe_results}/SE_model_5b_sequential_`yrmin3'_`yrmax3'.dta
append using ${swe_results}/SE_model_5bF_seq_`yrmin1'_`yrmax1'.dta
//append using ${swe_results}/SE_model_5bF_seq_`yrmin2'_`yrmax2'.dta
append using ${swe_results}/SE_model_5bF_seq_`yrmin3'_`yrmax3'.dta
order N Nkids Nmothers Nfathers, last
gen Period=.
replace Period=1 if yearmin==`yrmin1' & yearmax==`yrmax1'
//replace Period=2 if yearmin==`yrmin2' & yearmax==`yrmax2'
replace Period=3 if yearmin==`yrmin3' & yearmax==`yrmax3'
label define periodvals 1 "`yrmin1'-`yrmax1'" 2 "`yrmin2'-`yrmax2'" 3 "`yrmin3'-`yrmax3'", replace
label values Period periodvals 
** Decomposition  
*Decomposition elements specific to this model 
g b_Hp=((pi1*O_p)+pi2)*O_p*(vHp/vYp)	// own HC										-->	 1
g b_ep=pi1*(O_0p^2)*(vEta_ep/vYp)		// own (non-HC) employment						-->	 2
g b_p=pi1*(vEta_p/vYp)					// own residual yp								-->	 3
g b_HpHsp=O_p*d1*((O_s*pi1s)+pi2s)*(vHp/vYp) 	// sorting on HC 						-->  4
g b_EpyEspy=pi1s*(CovEpyEspy/vYp)				// sorting on residual income 			-->	 5A
g b_HpEspy=(O_p*pi1s*CovHpEspy/vYp) 			// sorting: own HC, spouse resid		-->	 5B (PART OF IT)
g b_EpyHsp=((pi1s*O_s)+pi2s)*(CovHspEpy/vYp) 	// sorting: spouse HC, own resid		-->	 5B (PART OF IT)
g b_sp=pi1*(O_0p^2)*(phi2^2)*(vEta_sp/vYp) 	// part of spouse HC orthogonal to own 		-->  5C
g b_4=b_HpHsp
g b_5=b_EpyEspy+b_HpEspy+b_EpyHsp+b_sp
gen B2=(b_Hp+b_ep+b_p+b_sp+b_HpHsp+b_HpEspy+b_EpyHsp+b_EpyEspy) // verify these sum to B
tab B B2 
drop B2 
*Compute "parts" of calculated IGE due to H, E (non-H part), similar to simpler model without AM. (Excluding eta_sp because not sure where to put it.)
g BH=b_Hp
g BE=b_ep
g BU=b_p
*Compute percent of Parent Char due to H, E, U. Again, same as simpler model without AM (divide by BH+BE+BU rather than B, so percent of own char).
g pctH=100*BH/(BH+BE+BU)
g pctE=100*BE/(BH+BE+BU)
g pctU=100*BU/(BH+BE+BU)
*Break down Hp channel into part related to Employment and part orthogonal to it
g BHinclE=(((O_0p*(phi1+(phi2*d1)))*((pi1*(O_0p*(phi1+(phi2*d1))))+pi2)) + (2*pi1*O_1p*(O_0p*(phi1+(phi2*d1)))))*(vHp/vYp)
g BHexclE=(O_1p*((pi1*O_1p)+pi2))*(vHp/vYp)
g temp=BHinclE+BHexclE // verify sums to BH
tab BH temp 
drop temp
*Percents for these (divide by BH+BE+BU rather than B, so percent of own char)
g pctHinclE=100*BHinclE/(BH+BE+BU)
g pctHexclE=100*BHexclE/(BH+BE+BU)
* Aggregate spouse/AM component 
g B_ownchar=b_Hp+b_ep+b_p
g B_spousesort=b_sp+b_HpHsp+b_HpEspy+b_EpyHsp+b_EpyEspy
g pct_ownchar=100*B_ownchar/B
g pct_spousesort=100*B_spousesort/B
* Compute percents of AM for 4 and 5a, 5b, 5c in decomp 
g pct_sp=100*b_sp/B_spousesort
g pct_HpHsp=100*b_HpHsp/B_spousesort
g pct_HE=100*(b_HpEspy+b_EpyHsp)/B_spousesort
g pct_EpyEspy=100*b_EpyEspy/B_spousesort
g pct_4=100*b_4/(b_4+b_5)
g pct_5=100*b_5/(b_4+b_5)
*Save this, then do avg par data, combine and save below
tempfile model5_indivparSE
save 	`model5_indivparSE'

** Main decomp model - parent avg income IRP
clear 
append using ${swe_results}/SE_model_5bavg_seq_`yrmin1'_`yrmax1'.dta
//append using ${swe_results}/SE_model_5bavg_seq_`yrmin2'_`yrmax2'.dta
append using ${swe_results}/SE_model_5bavg_seq_`yrmin3'_`yrmax3'.dta
gen Period=.
replace Period=1 if yearmin==`yrmin1' & yearmax==`yrmax1'
//replace Period=2 if yearmin==`yrmin2' & yearmax==`yrmax2'
replace Period=3 if yearmin==`yrmin3' & yearmax==`yrmax3'
label define periodvals 1 "`yrmin1'-`yrmax1'" 2 "`yrmin2'-`yrmax2'" 3 "`yrmin3'-`yrmax3'", replace
label values Period periodvals 
** Decomposition  
*Decomposition elements specific to this model 
g b_Hp=.5*((pi1*O_p)+pi2)*O_p*(vHp/vYp)	// own HC (mother)
g b_ep=.5*pi1*(O_0p^2)*(vEta_ep/vYp)		// own (non-HC) employment
g b_p=.5*pi1*(vEta_p/vYp)					// own residual yp
g b_Hsp=.5*((pi1s*O_s)+pi2s)*O_s*(vHsp/vYp) // spouse HC (father)
g b_es=.5*pi1s*(O_0s^2)*(vEta_es/vYp)		// spouse (non-HC) employment
g b_s=.5*pi1s*(vEta_s/vYp)					// spouse residual ys
g b_sp=.5*pi1*(O_0p^2)*(phi2^2)*(vEta_sp/vYp) 	// part of spouse HC orthogonal to own HC -- PUT THIS WITH NON-HC AM
g b_ss=.5*pi1s*(O_0s^2)*(phi2s^2)*(vEta_ss/vYp) 	// part of own HC orthogonal to spouse HC -- PUT THIS WITH NON-HC AM
g b_HpHsp=.5*(O_p*d1*((O_s*pi1s)+pi2s)*(vHp/vYp)+((pi1*O_p)+pi2)*O_s*d1s*(vHsp/vYp)) 	// sorting on HC
g b_HpEspy=.5*((O_p*pi1s*CovHpEspy/vYp)+(((O_p*pi1)+pi2)*CovHpEspy/vYp)) 			// sorting: own HC, spouse resid
g b_EpyHsp=.5*(((pi1s*O_s)+pi2s)*(CovHspEpy/vYp)+(pi1*O_s)*(CovHspEpy/vYp)) 	// sorting: spouse HC, own resid
g b_EpyEspy=.5*(pi1s*(CovEpyEspy/vYp)+(pi1*CovEpyEspy/vYp))				// sorting on residual income
*Mother char / Father char / Assort Mating
g b_Moth= (b_Hp+b_ep+b_p)
g b_Fath=b_Hsp+b_es+b_s 
g b_AM=b_sp+b_ss+b_HpHsp+b_HpEspy+b_EpyHsp+b_EpyEspy
g b_AMhc=b_HpHsp
g b_AMeta=b_sp+b_ss+b_HpEspy+b_EpyHsp+b_EpyEspy
*Rescale using var(ybar_p)/var(y_c) 	/* Note: SD of child rank vars taken from August 8, 2022 decomp log files */
g sd_paravgrank=sqrt(vYp)
g sd_childrank=.
replace sd_childrank=22.61855 if yearmin==`yrmin1' & yearmax==`yrmax1'
//replace sd_childrank=26.24539 if yearmin==`yrmin2' & yearmax==`yrmax2'
replace sd_childrank=27.80385 if yearmin==`yrmin3' & yearmax==`yrmax3'
g rescale_ratio=sd_paravgrank/sd_childrank
label var rescale_ratio "rescale factor= sd(ybar_p)/sd(y_c); converts IRP to correlation, same rescale to all underlying factors"
foreach var of varlist B b_* {
	replace `var'=`var'*rescale_ratio
}
*Percentage contributions
g pct_Moth=100*b_Moth/B
g pct_Fath=100*b_Fath/B 
g pct_AM=100*b_AM/B 
g pct_AMhc=100*b_AMhc/B
g pct_AMeta=100*b_AMeta/B
*Compute "parts" of calculated IGE due to H, E (non-H part). These are same as simpler model without AM. (Excluding eta_sp because not sure where to put it.)
g BH=b_Hp
g BE=b_ep
g BU=b_p
*Compute fraction of IGE due to H, E, U. Again, same as simpler model without AM.
g pctH=100*BH/(BH+BE+BU)
g pctE=100*BE/(BH+BE+BU)
g pctU=100*BU/(BH+BE+BU)
*Save this, then combine with indiv par, clean and save combined model 5 file
tempfile model5_avgparSE
save 	`model5_avgparSE'

** clean and combine individual/avg files
*** Formatting / Prepare for Tables
clear 
use `model5_indivparSE'
append using `model5_avgparSE'
*Round numbers to nearest .001 
ds modelnum model_descr Period parent N*, not
local varstoround `r(varlist)'
foreach var in `varstoround' {
	replace `var'=round(`var',.001)
}
*Round percents to nearest .1 
foreach var of varlist pct* {
	replace `var'=round(`var',.1)
}
drop model_descr 
order modelnum Period IGE B, first
order N Nkids Nmothers Nfathers, last
	*Reshape dataset multiple times to get in order for table
	sort modelnum Period
	gen tempid=_n
	*Get list of variable names to present in table (parameters, percents, b's)
	ds tempid modelnum parent Period , not
	local vars `r(varlist)'
	local Nvars: word count `vars'
	*Rename variables for reshape
	forv i=1/`Nvars' {
	rename `:word `i' of `vars'' V_`i'
	}
	*Reshape long
	reshape long V_, i(tempid) j(varnum)
	*Store variable names
	gen _varname=""
	forv i=1/`Nvars' {
	replace _varname="`:word `i' of `vars''" if varnum==`i' 
	}
	*Reshape wide to get column for each time period 
	drop tempid
	reshape wide V_, i(varnum modelnum) j(Period)
	format V_* %12.3f
	forv t=1(2)3 {
	rename V_`t' V_`yrmin`t''_`yrmax`t''
	}
	sort modelnum varnum
*SAVE TABLE DATA
order varnum _varname, first
save ${swe_results}/tables_model5_SE.dta, replace






********************************************************************************
*** PREP DECOMP RESULTS FOR CREATING TABLES AND FIGURES - APPENDIX  MODEL
********************************************************************************

*********
local yrmax3=2018
*** US model for individual parents
clear
append using ${us_results}/model_2b_sequential_`yrmin1'_`yrmax1'.dta
append using ${us_results}/model_2b_sequential_`yrmin3'_`yrmax3'.dta
append using ${us_results}/model_2bF_sequential_`yrmin1'_`yrmax1'.dta
append using ${us_results}/model_2bF_sequential_`yrmin3'_`yrmax3'.dta
order N Nkids Nmothers Nfathers, last
gen Period=.
replace Period=1 if yearmin==`yrmin1' & yearmax==`yrmax1'
replace Period=3 if yearmin==`yrmin3' & yearmax==`yrmax3'
label define periodvals 1 "`yrmin1'-`yrmax1'" 3 "`yrmin3'-`yrmax3'", replace
label values Period periodvals 

**** Decomposition   
* Compute "parts" of calculated IGE due to H, E (non-H part), and U 
g BH=(pi1+(pi2/O_p))*(O_p^2)*(vHp/vYp)
g BE=pi1*(O_0p^2)*(vEta_ep/vYp)
g BU=pi1*(vEta_p/vYp) 
g temp=BH+BE+BU // verify sums to B 
tab temp B
drop temp
* Compute percent of IGE due to H, E, U 
g pctH=100*BH/B
g pctE=100*BE/B
g pctU=100*BU/B
g temp=pctH+pctE+pctU //verify sums to 100
tab temp modelnum
drop temp
* We can further disagregate the part due to H into the parts that include/exclude the employment channel:
g BHinclE=O_0p*phi*((pi1*((O_0p*phi)+(2*O_1p)))+pi2)*(vHp/vYp)
g BHexclE=O_1p*((pi1*O_1p)+pi2)*(vHp/vYp)
g temp=BHinclE+BHexclE // verify sums to BH
bysort modelnum yearmin: su BH temp 
drop temp
*Percents for these 
g pctHinclE=100*BHinclE/B
g pctHexclE=100*BHexclE/B

*** Formatting / Prepare for Tables
*Round numbers to nearest .001 
ds modelnum model_descr Period N*, not
local varstoround `r(varlist)'
foreach var in `varstoround' {
	replace `var'=round(`var',.001)
}
*Round percents to nearest .1 
foreach var of varlist pct* {
	replace `var'=round(`var',.1)
}
drop model_descr 
order modelnum Period IGE B phi O_0p O_1p O_p pi1 pi2 vHp vEta_ep vEta_p vYp, first
order N Nkids Nmothers Nfathers yearmin yearmax, last
	*Reshape dataset multiple times to get in order for table
	sort modelnum Period
	gen tempid=_n
	*Get list of variable names to present in table (parameters, percents, b's)
	ds tempid modelnum Period , not
	local vars `r(varlist)'
	local Nvars: word count `vars'
	*Rename variables for reshape
	forv i=1/`Nvars' {
	rename `:word `i' of `vars'' V_`i'
	}
	*Reshape long
	reshape long V_, i(tempid) j(varnum)
	*Store variable names
	gen _varname=""
	forv i=1/`Nvars' {
	replace _varname="`:word `i' of `vars''" if varnum==`i' 
	}
	*Reshape wide to get column for each time period 
	drop tempid
	reshape wide V_, i(varnum modelnum) j(Period)
	format V_* %12.3f
	forv t=1(2)3 {
	rename V_`t' V_`yrmin`t''_`yrmax`t''
	}
	sort modelnum varnum
*SAVE TABLE DATA
order varnum _varname, first
save ${us_results}/tables_model2_US.dta, replace

*********
*** SWE model for individual parents
local yrmax3=2019
clear
append using ${swe_results}/SE_model_2b_seq_`yrmin1'_`yrmax1'.dta
append using ${swe_results}/SE_model_2b_seq_`yrmin3'_`yrmax3'.dta
append using ${swe_results}/SE_model_2bF_seq_`yrmin1'_`yrmax1'.dta
append using ${swe_results}/SE_model_2bF_seq_`yrmin3'_`yrmax3'.dta
order N Nkids Nmothers Nfathers, last
gen Period=.
replace Period=1 if yearmin==`yrmin1' & yearmax==`yrmax1'
replace Period=3 if yearmin==`yrmin3' & yearmax==`yrmax3'
label define periodvals 1 "`yrmin1'-`yrmax1'" 3 "`yrmin3'-`yrmax3'", replace
label values Period periodvals 
**** Decomposition   
* Compute "parts" of calculated IGE due to H, E (non-H part), and U 
g BH=(pi1+(pi2/O_p))*(O_p^2)*(vHp/vYp)
g BE=pi1*(O_0p^2)*(vEta_ep/vYp)
g BU=pi1*(vEta_p/vYp) 
g temp=BH+BE+BU // verify sums to B 
tab temp B
drop temp
* Compute percent of IGE due to H, E, U 
g pctH=100*BH/B
g pctE=100*BE/B
g pctU=100*BU/B
g temp=pctH+pctE+pctU //verify sums to 100
tab temp modelnum
drop temp
* We can further disagregate the part due to H into the parts that include/exclude the employment channel:
g BHinclE=O_0p*phi*((pi1*((O_0p*phi)+(2*O_1p)))+pi2)*(vHp/vYp)
g BHexclE=O_1p*((pi1*O_1p)+pi2)*(vHp/vYp)
g temp=BHinclE+BHexclE // verify sums to BH
bysort modelnum yearmin: su BH temp 
drop temp 
*Percents for these 
g pctHinclE=100*BHinclE/B
g pctHexclE=100*BHexclE/B
*** Formatting / Prepare for Tables
*Round numbers to nearest .001 
ds modelnum model_descr Period N*, not
local varstoround `r(varlist)'
foreach var in `varstoround' {
	replace `var'=round(`var',.001)
}
*Round percents to nearest .1 
foreach var of varlist pct* {
	replace `var'=round(`var',.1)
}
drop model_descr 
order modelnum Period IGE B phi O_0p O_1p O_p pi1 pi2 vHp vEta_ep vEta_p vYp, first
order N Nkids Nmothers Nfathers yearmin yearmax, last
	*Reshape dataset multiple times to get in order for table
	sort modelnum Period
	gen tempid=_n
	*Get list of variable names to present in table (parameters, percents, b's)
	ds tempid modelnum Period , not
	local vars `r(varlist)'
	local Nvars: word count `vars'
	*Rename variables for reshape
	forv i=1/`Nvars' {
	rename `:word `i' of `vars'' V_`i'
	}
	*Reshape long
	reshape long V_, i(tempid) j(varnum)
	*Store variable names
	gen _varname=""
	forv i=1/`Nvars' {
	replace _varname="`:word `i' of `vars''" if varnum==`i' 
	}
	*Reshape wide to get column for each time period 
	drop tempid
	reshape wide V_, i(varnum modelnum) j(Period)
	format V_* %12.3f
	forv t=1(2)3 {
	rename V_`t' V_`yrmin`t''_`yrmax`t''
	}
	sort modelnum varnum
*SAVE TABLE DATA
order varnum _varname, first
save ${swe_results}/tables_model2_SE.dta, replace







********************************************************************************
**** CREATE FIGURES
********************************************************************************

*FIGURE B1: DECOMPOSITION RESULTS FOR BASIC MODEL
clear
local yrmin1=1985 
local yrmax1=1995
local yrmin3=2008
local yrmax3=2018
append using ${us_results}/model_2b_sequential_`yrmin1'_`yrmax1'.dta
append using ${us_results}/model_2b_sequential_`yrmin3'_`yrmax3'.dta
append using ${us_results}/model_2bF_sequential_`yrmin1'_`yrmax1'.dta
append using ${us_results}/model_2bF_sequential_`yrmin3'_`yrmax3'.dta
gen country=2
gen country_name="US"
local yrmax3=2019
append using ${swe_results}/SE_model_2b_seq_`yrmin1'_`yrmax1'.dta
append using ${swe_results}/SE_model_2b_seq_`yrmin3'_`yrmax3'.dta
append using ${swe_results}/SE_model_2bF_seq_`yrmin1'_`yrmax1'.dta
append using ${swe_results}/SE_model_2bF_seq_`yrmin3'_`yrmax3'.dta
replace country=1 if country==.
replace country_name="Sweden" if country_name==""
*Decomposition
* Compute "parts" of calculated IGE due to H, E (non-H part), and U 
g BH=(pi1+(pi2/O_p))*(O_p^2)*(vHp/vYp)
g BE=pi1*(O_0p^2)*(vEta_ep/vYp)
g BU=pi1*(vEta_p/vYp) 
* We can further disagregate the part due to H into the parts that include/exclude the employment channel:
g BHinclE=O_0p*phi*((pi1*((O_0p*phi)+(2*O_1p)))+pi2)*(vHp/vYp)
g BHexclE=O_1p*((pi1*O_1p)+pi2)*(vHp/vYp)
* Time period 
gen period=.
replace period=1 if yearmin==`yrmin1'
replace period=3 if yearmin==`yrmin3'
label define periodvals 1 "1985-1995" 3 "2008-2019"
label values period periodvals
label var BE "Employment"
label var BH "Human capital"
label var BU "Resid income"
label var BHexclE "Human capital"
label var BHinclE "Employment sorting"
*FIGURE B1a - Mothers - with detailed HC channels
graph bar BHexclE BHinclE BE BU if modelnum=="2b", over(country_name) over(period) stack ///
	bar(1, color(red%30)) bar(2, color(red%80)) ///
	bar(3, color(cranberry%100)) bar(4, color(maroon%70)) ///
	ytitle("IRP") graphr(fc(white) c(white))  /*ysc(r(0 0.15)) */ ylab( , nogrid) legend(ring(0) pos(10) cols(1) order(1 2 3 4) label(1 "Human capital") label(2 "HC-Employment sorting") label(3 "Empoyment") label(4 "Residual Income") region(lstyle(none))) graphr(fc(white) c(white)) saving(${tabfig}/figB1a, replace)	
graph export ${tabfig}/figB1a.pdf, replace	
*FIGURE B1b Fathers - with detailed HC channels
graph bar BHexclE BHinclE BE BU if modelnum=="2bF", over(country_name) over(period) stack ///
	bar(1, color(ltblue%40)) bar(2, color(eltblue%70)) ///
	bar(3, color(navy)) bar(4, color(ebblue)) ///
	ytitle("IRP") graphr(fc(white) c(white))  /*ysc(r(0 0.35)) */ ylab( , nogrid)  legend(ring(0) pos(10) cols(1) order(1 2 3 4) label(1 "Human capital") label(2 "HC-Employment sorting") label(3 "Empoyment") label(4 "Residual Income") region(lstyle(none))) graphr(fc(white) c(white)) 	saving(${tabfig}/figB1b, replace)	
graph export ${tabfig}/figB1b.pdf, replace


********************************************************************************
**** CREATE TABLES
********************************************************************************

*TABLES B1 and B2: Decomposition and Paramaters for Separate mother and father models
use ${swe_results}/tables_model2_SE.dta, clear
renvars , postfix(SE)
rename (varnumSE _varnameSE modelnumSE) (varnum _varname modelnum)
merge 1:1 varnum _varname modelnum using ${us_results}/tables_model2_US.dta, gen(merge) 
drop merge
sort modelnum varnum
*Keep early and late periods (omit middle) to save space 
//drop Mothers_1996* Fathers_1996*
//drop V_1996*
*Allow for full sample sizes to be displayed
format *SE %15.3f
*TABLE B1: IRP Decompositions: Separate mother and father models
preserve 
keep if substr(_varname,1,1)=="B" | inlist(_varname,"Nmothers","Nfathers","Nkids","N")
local i=0
gen tempsort=.
foreach xx in B BH BE BU BHexclE BHinclE  Nmothers Nfathers Nkids N {
	local i=`i'+1
	replace tempsort=`i' if _varname=="`xx'"
}
replace _varname="Mothers: IRP" if modelnum=="2b" & _varname=="B"
replace _varname="Fathers: IRP" if modelnum=="2bF" & _varname=="B"
sort modelnum tempsort
texsave  _varname V_* using ${tabfig}/tableB1.tex, align(l r r r r ) replace  title(IRP Decompositions: Separate Mother and Father models) 
restore 
*TABLE B2: Parameters: Separate mother and father models
preserve 
drop if substr(_varname,1,1)=="b"
drop if substr(_varname,1,1)=="B"
drop if substr(_varname,1,3)=="pct"
drop if inlist(_varname,"yearmin","yearmax")
replace _varname="Mothers: IRP" if modelnum=="2b" & _varname=="IGE"
replace _varname="Fathers: IRP" if modelnum=="2bF" & _varname=="IGE"
texsave _varname V_* using ${tabfig}/tableB2.tex, align(l r r r r ) replace  title(Parameters: Separate Mother and Father models) 
restore



*TABLES B3 and B5: IRP decompositions and parameters for Model with assortative mating
use ${swe_results}/tables_model5_SE.dta, clear
renvars , postfix(SE)
rename (varnumSE _varnameSE modelnumSE) (varnum _varname modelnum)
merge 1:1 varnum _varname modelnum using ${us_results}/tables_model5_US.dta, gen(merge) 
sort modelnum varnum
*Keep early and late periods (omit middle) to save space 
//drop Mothers_1996* Fathers_1996* 
//drop *1996*
*Drop variables and lines for MF Avg model 
//drop MF*
drop if modelnum=="5bavg"
drop if V_1985_1995SE==.
drop if V_1985_1995==.
tab merge 
drop merge
drop parent*
*Allow for full sample sizes to be displayed
format *SE %15.3f
*TABLE B3: IRP decompositions: Model with assortative mating
preserve 
keep if substr(_varname,1,1)=="B" | substr(_varname,1,1)=="b" | inlist(_varname,"Nmothers","Nfathers","Nkids","N")
drop if inlist(_varname,"b_Hp","b_ep","b_p","b_HpHsp","b_HpEspy","b_EpyHsp","b_sp","b_EpyEspy")
local i=0
gen tempsort=.
foreach xx in B B_ownchar BH BE BU BHexclE BHinclE B_spousesort b_4 b_5 Nmothers Nfathers Nkids N {
	local i=`i'+1
	replace tempsort=`i' if _varname=="`xx'"
}
replace _varname=subinstr(_varname,"_","",.)
replace _varname="Mothers: IRP" if modelnum=="5b" & _varname=="B"
replace _varname="Fathers: IRP" if modelnum=="5bF" & _varname=="B"
replace _varname="bfour" if _varname=="b_4"
replace _varname="bfive" if _varname=="b_5"
sort modelnum tempsort
texsave  _varname V_* using ${tabfig}/tableB3.tex, align(l r r r r ) replace  title(IRP Decompositions: Separate Mother and Father models) 
restore 
*TALBE B5: Parameters: Model with assortative mating
preserve 
drop if substr(_varname,1,1)=="b"
drop if substr(_varname,1,1)=="B"
drop if substr(_varname,1,3)=="pct"
drop if inlist(_varname,"yearmin","yearmax")
drop if inlist(_varname,"CovEspEspy","CovEepEspy","CovEpEspy")
reshape wide V_*, i(varnum) j(modelnum) string  //Reshape wide (mother & father next to each other instead of stacked)
foreach yy in 1985_1995 2008_2019 {
 rename V_`yy'SE5b  SE_M_`yy'
 rename V_`yy'SE5bF SE_F_`yy'
}
foreach yy in 1985_1995 2008_2018 {
 rename V_`yy'5b  US_M_`yy'
 rename V_`yy'5bF US_F_`yy'
}
aorder
order varnum _varname, first
texsave  _varname SE_* US_* using ${tabfig}/tableB5.tex, landscape align(l r r r r r r r r ) replace  title(Parameters: Model with Assortative Mating) //frag
restore


***TABLE B4: Decomposition of IRP for average parent income measure (%)
use ${swe_results}/tables_model5_SE.dta, clear
renvars , postfix(SE)
rename (varnumSE _varnameSE modelnumSE) (varnum _varname modelnum)
merge 1:1 varnum _varname modelnum using ${us_results}/tables_model5_US.dta, gen(merge) 
sort modelnum varnum
keep if modelnum=="5bavg"
*Keep early and late periods (omit middle) to save space 
//drop *1996*
*Drop variables and lines for MF Avg model 
drop if V_1985_1995SE==.
drop if V_1985_1995==.
tab merge 
drop merge
drop parent*
*Allow for full sample sizes to be displayed
format *SE %15.3f
*Table with percentages only (for paper)
preserve 
keep if inlist(_varname,"IGE","pct_Moth","pct_Fath","pct_AM", "pct_AMhc", "pct_AMeta") 
drop modelnum 
texsave _varname V_* using ${tabfig}/tableB4.tex, align(l r r r r ) replace title(Decomposition of IRP for average parent income measure) // frag 
restore

